Questions

Use the data of your own. Produce the following types of plots and comment on each plot. Plots should be meaningful. If you use the data we used in class, make sure the plots are not the same as the ones in the slides. All plots should have title, caption, appropriate labels on x and y-axis.


  1. Use the WHO’s dataset at this link. Make a top-10 bar race by months between countries on the number of deaths by Covid 19.
library(tidyverse)
## -- Attaching packages ----------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts -------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
covid=read.csv("https://covid19.who.int/WHO-COVID-19-global-data.csv")

library(gganimate)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
covid$month <- month(covid$ï..Date_reported)

covid1 <- covid %>% group_by(Country,month) %>% summarise(mean = mean(New_deaths), .groups='drop')

covid2 <- covid1 %>% group_by(month) %>% mutate(rank=rank(-mean)) %>% ungroup()

covid3 <- covid2 %>% filter(rank <= 10)

covid_final <- covid3 %>% ggplot(aes(x=rank, y=mean, group=Country, fill=Country, label=Country)) + geom_col()+
  geom_text(aes(y = mean, label = Country), hjust = 1.4)+ 
  coord_flip(clip = "off", expand = FALSE) +scale_x_reverse()+
  labs(title = 'Month: {closest_state}', x='', y='Average Number of New Deaths', fill='Country')+
  theme(plot.title = element_text(hjust = 1, size = 22),
        axis.ticks.y = element_blank(),
        axis.text.y  = element_blank()) + 
  transition_states(month)+
  ease_aes("cubic-in-out")
animate(covid_final, nframes = 400, fps = 10)

  1. Make another bar race using that dataset.
covid4 <- covid %>% group_by(Country_code,month) %>% summarise(sum = sum(New_deaths), .groups='drop')

covid5 <- covid4 %>% group_by(month) %>% mutate(rank=rank(-sum)) %>% ungroup()

covid6 <- covid5 %>% filter(rank <= 10)

covid_final2 <- covid6 %>% ggplot(aes(x=rank, y=sum, group=Country_code, fill=Country_code, label=Country_code)) + geom_col()+
  geom_text(aes(y = sum, label = Country_code), hjust = 1.4)+
  coord_flip(clip = "off", expand = FALSE) +scale_x_reverse()+
  labs(title = 'Month: {closest_state}', x='', y='Total Number of New Deaths', fill='Country_code')+
  theme(plot.title = element_text(hjust = 1, size = 22),
        axis.ticks.y = element_blank(),
        axis.text.y  = element_blank()) + 
  transition_states(month)+
  ease_aes("cubic-in-out")
animate(covid_final2, nframes = 400, fps = 10)

  1. Make a bar race using a dataset of your own interest.
state<-read_csv("C:\\Users\\student\\Desktop\\Statistical Analysis with R\\us_counties_covid19_daily.csv")
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_double(),
##   cases = col_double(),
##   deaths = col_double()
## )
state$month <-month(state$date)

state1<-state %>% group_by(county,month) %>% summarise(mean=mean(cases), .groups='drop')

state2<-state1 %>% group_by(month) %>% mutate(rank=rank(-mean)) %>% ungroup()

state3<-state2 %>% filter(rank<5)

state_final<-state3 %>% ggplot(aes(x=rank, y=mean, group=county, fill=county, label=county)) + geom_col()+
  geom_text(aes(y = mean, label = county), hjust = 1.4)+
  coord_flip(clip = "off", expand = FALSE) +scale_x_reverse()+
  labs(title = 'Month: {closest_state}', x='', y='Average Number of New Cases by County', fill='county')+
  theme(plot.title = element_text(hjust = 1, size = 22),
        axis.ticks.y = element_blank(),
        axis.text.y  = element_blank()) + 
  transition_states(month)+
  ease_aes("cubic-in-out")
animate(state_final, nframes = 400, fps = 10)